Invoke data set Mode from package mlogit. To do this you need to first load the package. This is a data set with choices about mode of transportation. This is done as follows:

data("Mode")

Once you have loaded the data set, answer the following questions: Describe this data set. How many variables are there and of which type (i.e., categorical/quantitative)?

glimpse(Mode)
## Rows: 453
## Columns: 9
## $ choice       <fct> car, rail, car, car, car, car, car, car, bus, car, rail, …
## $ cost.car     <dbl> 1.5070097, 6.0569985, 5.7946769, 1.8691439, 2.4989523, 4.…
## $ cost.carpool <dbl> 2.3356118, 2.8969191, 2.1374543, 2.5724266, 1.7220099, 0.…
## $ cost.bus     <dbl> 1.800512, 2.237128, 2.576385, 1.903518, 2.686000, 1.84765…
## $ cost.rail    <dbl> 2.358920, 1.855450, 2.747479, 2.268276, 2.973866, 2.31005…
## $ time.car     <dbl> 18.503200, 31.311107, 22.547429, 26.090282, 4.699140, 3.0…
## $ time.carpool <dbl> 26.338233, 34.256956, 23.255171, 29.896023, 12.414084, 9.…
## $ time.bus     <dbl> 20.86779, 67.18189, 63.30906, 19.75270, 43.09204, 12.8256…
## $ time.rail    <dbl> 30.03347, 60.29313, 49.17164, 13.47268, 39.74325, 43.5442…

How many different modes of transportation are in this data set? What is the most popular mode? What is the least popular mode?

summary(Mode)
##      choice       cost.car       cost.carpool       cost.bus    
##  car    :218   Min.   :0.4099   Min.   :0.1293   Min.   :1.013  
##  carpool: 32   1st Qu.:3.6964   1st Qu.:0.9519   1st Qu.:1.783  
##  bus    : 81   Median :4.8796   Median :1.6665   Median :2.027  
##  rail   :122   Mean   :4.8735   Mean   :1.6863   Mean   :2.036  
##                3rd Qu.:6.2255   3rd Qu.:2.4581   3rd Qu.:2.321  
##                Max.   :8.8555   Max.   :3.2953   Max.   :2.740  
##    cost.rail        time.car       time.carpool       time.bus     
##  Min.   :1.272   Min.   : 2.404   Min.   : 8.385   Min.   : 1.969  
##  1st Qu.:1.947   1st Qu.:21.835   1st Qu.:28.391   1st Qu.:25.457  
##  Median :2.198   Median :37.497   Median :40.637   Median :41.415  
##  Mean   :2.212   Mean   :37.044   Mean   :39.771   Mean   :39.923  
##  3rd Qu.:2.476   3rd Qu.:53.104   3rd Qu.:51.843   3rd Qu.:52.805  
##  Max.   :3.113   Max.   :66.871   Max.   :65.009   Max.   :75.681  
##    time.rail     
##  Min.   : 4.621  
##  1st Qu.:28.143  
##  Median :40.034  
##  Mean   :39.505  
##  3rd Qu.:49.172  
##  Max.   :73.998
ggplot(Mode)+
  geom_bar(aes(choice, fill=choice), color="black", show.legend = FALSE)+
  theme_bw()

In general, what is the most expensive mode? The least expensive?

ggplot(Mode)+
  geom_density(aes(cost.car, fill="car"), color="black", alpha=0.5)+
  geom_density(aes(cost.carpool , fill="carpool"), color="black", alpha=0.5)+
  geom_density(aes(cost.bus, fill="bus"), color="black", alpha=0.5)+
  geom_density(aes(cost.rail, fill="rail"), color="black", alpha=0.5)+
  scale_fill_manual(values=c("car"= "firebrick", "carpool"="dodgerblue", "bus"="darkgoldenrod2", "rail"="cyan"), name="Mode")+
  xlab("Cost")+
theme_bw()

Create a plot showing the univariate distributions of time by car and time by bus. Discuss.

grafica_time <- ggplot(Mode)+
  geom_density(aes(time.car, fill="car"), color="black", alpha=0.5)+
  geom_density(aes(time.carpool , fill="carpool"), color="black", alpha=0.5)+
  geom_density(aes(time.bus, fill="bus"), color="black", alpha=0.5)+
  geom_density(aes(time.rail, fill="rail"), color="black", alpha=0.5)+
  scale_fill_manual(values=c("car"= "firebrick", "carpool"="dodgerblue", "bus"="darkgoldenrod2", "rail"="cyan"), name="Mode")+
  xlab("Time")+
theme_bw()


ggplotly(grafica_time)  

How do choices relate to cost by the different modes?

varnum <- Mode |> dplyr::select(where(is.numeric))
varnum <- names(varnum)

for (var in varnum) {
grafica_box <- ggplot(Mode)+
  geom_boxplot(aes(choice, eval(as.name(var)), fill=choice), show.legend=FALSE)+
  ylab(var)+
  theme_bw()
  
  print(grafica_box)
    
}